Many of the figures and analysis methodology were based off of this tutorial from NYU. https://learn.gencore.bio.nyu.edu/rna-seq-analysis/gene-set-enrichment-analysis/

DESeq2 Differential Expression Analysis

# # Read Count Matrix
count.matrix <- read_tsv(trimws(sample_params$salmon_merged_gene_counts_file_path)) %>%
  mutate(across(3:ncol(.), ~ as.integer(.x))) %>%
  column_to_rownames("gene_id")

gene.reference <- dplyr::select(count.matrix, 1)

count.matrix <- count.matrix %>%
  dplyr::select(-1)
# Design ColData Matrix
sample <- colnames(count.matrix)
colData <- data.frame(sample)

# Add Columns for Condition to colData
for (i in colnames(metadata)){
  colData[i] <- metadata[i]
}

colData <- column_to_rownames(colData, "sample")
# Create DESeq Data Set
dds <- DESeqDataSetFromMatrix(
  countData = count.matrix,
  colData = colData,
  design = as.formula(sample_params$design)
)

# Set Reference Level
reflevelcond <- trimws(sample_params$ref_level_cond)
reflevelvalue <- trimws(sample_params$ref_level_value)
dds[[reflevelcond]] <- relevel(dds[[reflevelcond]], ref = reflevelvalue)

# Subset out genes with low overall counts
keep <- rowSums(counts(dds) >= 10) >= 4
dds <- dds[keep,]

# Run DESeq
dds <- DESeq(dds)

# Regularized Log Transform
rld <- rlog(dds)
if (as.logical(trimws(sample_params$batch_correct))) {
  
  # Check that condition for batch correction matches column metadata
  if(all(trimws(sample_params$batch_cond) != colnames(metadata))){
    stop(sprintf("The given condition for batch correction:%s, doesn't match any prior conditions: %s", sample_params$batch_cond, colnames(metadata)))
  }
  
  # Replicate / Batch Correction
  batch <- trimws(sample_params$batch_cond)
  
  batch.correct <- ComBat_seq(
    counts = as.matrix(count.matrix),
    batch = metadata[[batch]],
    group = metadata[[colnames(metadata)[colnames(metadata) != batch]]]) %>%
    as.data.frame()

  # Create DESeq Data Set
  batch.correct.dds <- DESeqDataSetFromMatrix(
    countData = batch.correct,
    colData = colData,
    design = as.formula(sample_params$design))
  
  # Set Factor Level
  reflevelcond <- trimws(sample_params$ref_level_cond)
  reflevelvalue <- trimws(sample_params$ref_level_value)
  batch.correct.dds[[reflevelcond]] <- relevel(batch.correct.dds[[reflevelcond]], ref = reflevelvalue)
  
  # Subset out genes with low overall counts
  keep <- rowSums(counts(batch.correct.dds) >= 10) >= 4
  batch.correct.dds <- batch.correct.dds[keep,]
  
  # Run DESeq
  batch.correct.dds <- DESeq(batch.correct.dds)
  
  # Regularized Log Transform
  batch.correct.rld <- rlog(batch.correct.dds)
  
  # Cleanup
  rm(batch, batch.correct)
}

# Cleanup
rm(keep)
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
#  Logic here uses the reference level condition and value inputs to create   #
#  contrast terms assumed to compare treatment samples with control samples   #

# # Find relevant contrasts
contrasts <- list()

# Factor values of experimental condition
pattern <- factor(metadata[[reflevelcond]])
comp <- levels(pattern)

# Remove control state from experimental condition vector
comp <- comp[comp != reflevelvalue]

# Create Contrasts
for (i in 1:length(comp)){
  contrasts[[i]] <- c(reflevelcond, comp[i], reflevelvalue)
}
# Pull results from DESeq object with contrasts
for (i in 1:length(contrasts)){
  results[[i]] <- list()
  results[[i]][["DataFrame"]] <- results(dds.for.analysis, contrast = contrasts[[i]]) %>%
    data.frame() %>%
    rownames_to_column("gene_id") %>%
    mutate(gene_name = gene.reference[gene_id, 1], .before=baseMean) %>% # Add gene names to DESeq results matrices
    column_to_rownames("gene_id")
  
  results[[i]][["DESeqDataSet"]] <- results(dds.for.analysis, contrast = contrasts[[i]])
}

Principle Component Analysis

# View non-batch corrected
pca <- visualizePCsByCondition(
  rlog = rld,
  metadata = colData,
  title = "Before Replicate Batch Correction")

pca$logScaleScree

pca$linearScaleScree

pca$selectedPCs

# View Rep effect corrected
if(as.logical(trimws(sample_params$batch_correct))){
pca.batch <- visualizePCsByCondition(
    rlog = batch.correct.rld,
    metadata = colData,
    title = "After Replicate Batch Correction")

print(pca.batch$logScaleScree)
print(pca.batch$linearScaleScree)
print(pca.batch$selectedPCs)
}

strain_hrde1_vs_wt

Inputs for function call:

compileReport(result.obj = results[[1]], contrast = sprintf("%s_%s_vs_%s", 
    contrasts[[i]][1], contrasts[[i]][2], contrasts[[i]][3]), 
    l2fc_filter = 0.8, padj_filter = 0.1) 

DEG Report

Ontology Report

Transfering WORMBASE gene IDs to ENTREZID:

273 gene IDs were not transfered from WORMBASE to ENTREZID 
11510 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:

470

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:

361

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:

109

Dysregulated

Reactome Pathway GSEA

Translation

Mitochondrial translation termination

Mitochondrial translation

Mitochondrial translation elongation

Eukaryotic Translation Initiation

Cap-dependent Translation Initiation

GTP hydrolysis and joining of the 60S ribosomal subunit

L13a-mediated translational silencing of Ceruloplasmin expression

SRP-dependent cotranslational protein targeting to membrane

Formation of a pool of free 40S subunits

Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)

Metabolism of RNA

Nonsense-Mediated Decay (NMD)

Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)

Respiratory electron transport

Aerobic respiration and respiratory electron transport

mRNA Splicing - Major Pathway

Formation of the Early Elongation Complex

RNA Polymerase II Promoter Escape

RNA Polymerase II Transcription Pre-Initiation And Promoter Opening

RNA Polymerase II Transcription Initiation

RNA Polymerase II Transcription Initiation And Promoter Clearance

PI Metabolism

RNA Polymerase I Promoter Escape

RNA Polymerase I Promoter Clearance

RNA Polymerase I Transcription

RNA Polymerase II Pre-transcription Events

mRNA Splicing

Amino acids regulate mTORC1

Cellular response to starvation

GO GSEA

protein phosphorylation

phosphorylation

protein dephosphorylation

dephosphorylation

ribonucleoprotein complex biogenesis

ribosome biogenesis

tRNA metabolic process

immune system process

immune response

innate immune response

defense response to symbiont

male gamete generation

mitochondrial translation

rRNA metabolic process

RNA modification

rRNA processing

tRNA processing

developmental process involved in reproduction

spermatogenesis

cell fate commitment

mitochondrial gene expression

defense response

defense response to other organism

cellular respiration

mitochondrial respiratory chain complex assembly

cuticle development

establishment of protein localization to membrane

gamete generation

collagen and cuticulin-based cuticle development

mitochondrial respirasome assembly

Upregulated

Table Reactome Pathway Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table Reactome Pathway Over-representation Analysis of Downregulated Genes has no rows.

strain_hrde2_vs_wt

Inputs for function call:

compileReport(result.obj = results[[1]], contrast = sprintf("%s_%s_vs_%s", 
    contrasts[[i]][1], contrasts[[i]][2], contrasts[[i]][3]), 
    l2fc_filter = 0.8, padj_filter = 0.1) 

DEG Report

Ontology Report

Transfering WORMBASE gene IDs to ENTREZID:

273 gene IDs were not transfered from WORMBASE to ENTREZID 
11510 gene IDs were successfully transfered from WORMBASE to ENTREZID 

Number of highly variable genes identified (with | L2FC | > 0.8 and padj < 0.1) is:

470

Number of highly upregulated genes identified (with L2FC > 0.8 and padj < 0.1) is:

361

Number of highly downregulated genes identified (with L2FC < -0.8 and padj < 0.1) is:

109

Dysregulated

Reactome Pathway GSEA

Translation

Mitochondrial translation termination

Mitochondrial translation

Mitochondrial translation elongation

Eukaryotic Translation Initiation

Cap-dependent Translation Initiation

L13a-mediated translational silencing of Ceruloplasmin expression

SRP-dependent cotranslational protein targeting to membrane

Formation of a pool of free 40S subunits

GTP hydrolysis and joining of the 60S ribosomal subunit

Nonsense Mediated Decay (NMD) independent of the Exon Junction Complex (EJC)

Metabolism of RNA

mRNA Splicing - Major Pathway

Nonsense-Mediated Decay (NMD)

Nonsense Mediated Decay (NMD) enhanced by the Exon Junction Complex (EJC)

Respiratory electron transport

GO GSEA

protein phosphorylation

phosphorylation

protein dephosphorylation

ribosome biogenesis

ribonucleoprotein complex biogenesis

dephosphorylation

tRNA metabolic process

immune response

innate immune response

defense response to symbiont

immune system process

rRNA processing

rRNA metabolic process

developmental process involved in reproduction

mitochondrial translation

male gamete generation

mitochondrial gene expression

tRNA processing

mitochondrial respiratory chain complex assembly

collagen and cuticulin-based cuticle development

mitochondrial respirasome assembly

establishment of protein localization to membrane

RNA modification

cellular respiration

spermatogenesis

defense response

response to biotic stimulus

response to external biotic stimulus

biological process involved in interspecies interaction between organisms

response to other organism

Upregulated

Table Reactome Pathway Over-representation Analysis of Upregulated Genes has no rows.

Downregulated

Table Reactome Pathway Over-representation Analysis of Downregulated Genes has no rows.